
##########################################################################################################
##      Wen, Fangqi, Erik H. Wang, and Michael Hout. 2024.                                              ##
##      "Social Mobility in the Tang Dynasty as the Imperial Examination Rose and                       ##
##      Aristocratic Family Pedigree Declined, 618-907 CE."                                             ##
##      Proceedings of the National Academy of Sciences 121(4): e2305564121.                            ##
##      Replication Codes                                                                               ##
##      Jan 18, 2024                                                                                    ##
##########################################################################################################


rm(list=ls())
library(ggpubr)
library(ggplot2)
library(fixest)
library(gridExtra)

## please set your directory right

load("PNAS_Tang_Mobility_Replication.RData")


######################################
### Supplementary Information (SI) ###
######################################

### SI Section 2: Effects of Family Background on Exam Success
plot_model <- feols(Exam ~ branch + 
                      branch:decade +
                      branch:I(decade^2) + 
                      FRank +
                      FRank:decade + 
                      FRank:I(decade^2) +
                      GPRank +
                      GPRank:decade + 
                      GPRank:I(decade^2) +
                      MarriageFull + 
                      MarriageFull:decade +
                      MarriageFull:I(decade^2) +
                      Bieji | Modern.province^decade +
                      Pre.TangRegion_alt^decade,
                    data = People, cluster = c("decade"))    
results_plot_model <- summary(plot_model)

## get the x01 for the x-axis values in plots
x01 <- min(unique(People$decade), na.rm = T):
  max(unique(People$decade), na.rm = T)

## Branch Effect Over Time
dy.dx1 <- results_plot_model$coefficients["branch"] +
  results_plot_model$coefficients["branch:decade"] * x01 +
  results_plot_model$coefficients["branch:I(decade^2)"] * (x01^2)

dy.dx1_se <- sqrt(results_plot_model$cov.scaled["branch", "branch"] + # V(\beta_{1})
                    results_plot_model$cov.scaled["branch:decade", "branch:decade"] * x01^2 + #x^2*V(\beta_{3})
                    results_plot_model$cov.scaled["branch:I(decade^2)", "branch:I(decade^2)"] * x01^4 + #x^4*V(\beta_{4})
                    results_plot_model$cov.scaled["branch:decade", "branch"] * 2*x01 +
                    results_plot_model$cov.scaled["branch", "branch:I(decade^2)"] * 2*x01^2 +
                    results_plot_model$cov.scaled["branch:decade", "branch:I(decade^2)"] * 2*x01^3)

upr1 <- dy.dx1 + qnorm(.975)*dy.dx1_se # upper bound of the 95 percent
lwr1 <- dy.dx1 - qnorm(.975)*dy.dx1_se # lower bound of the 95 percent

branch_effect <- cbind.data.frame(x01, dy.dx1, lwr1, upr1)
branch_effect$variable <- "branch"

plot_branch <- ggplot(data=branch_effect, aes(x=x01, y=dy.dx1, ymax=upr1, ymin=lwr1)) + 
  geom_line() + geom_ribbon(alpha=0.2) +
  theme_bw() + 
  ylab("Coefficient") + xlab("Year of Birth") +
  ggtitle("Effect of Prominent Branch") +
  geom_hline(yintercept=0, linetype="longdash", colour="red") +
  scale_x_continuous(breaks = c(550, 600, 650, 700, 750, 800, 850)) +
  scale_y_continuous(limits = c(-0.5, 0.52),
                     breaks = c(-0.4, -0.2, 0, 0.2, 0.4)) +
  theme(plot.title = element_text(hjust = 0.5, face = "bold", size=20),
        axis.title.x = element_text(face = "bold", size=20),
        axis.title.y = element_text(face = "bold", size=20),
        plot.margin=unit(c(0.5, 0.5, 0.5, 0.5),"cm"),
        axis.text.x=element_text(face = "bold", size=18),
        axis.text.y=element_text(face = "bold", size=18)) 
plot_branch


### getting father effects in the same model
dy.dx1 <- results_plot_model$coefficients["FRank"] +
  results_plot_model$coefficients["decade:FRank"] * x01 +
  results_plot_model$coefficients["I(decade^2):FRank"] * (x01^2)

dy.dx1_se <- sqrt(results_plot_model$cov.scaled["FRank", "FRank"] + # V(\beta_{1})
                    results_plot_model$cov.scaled["decade:FRank", "decade:FRank"] * x01^2 + #x^2*V(\beta_{3})
                    results_plot_model$cov.scaled["I(decade^2):FRank", "I(decade^2):FRank"] * x01^4 + #x^4*V(\beta_{4})
                    results_plot_model$cov.scaled["decade:FRank", "FRank"] * 2*x01 +
                    results_plot_model$cov.scaled["FRank", "I(decade^2):FRank"] * 2*x01^2 +
                    results_plot_model$cov.scaled["decade:FRank", "I(decade^2):FRank"] * 2*x01^3)

upr1 <- dy.dx1 + qnorm(.975)*dy.dx1_se # upper bound of the 95 percent
lwr1 <- dy.dx1 - qnorm(.975)*dy.dx1_se # lower bound of the 95 percent

FatherRank_effect <- cbind.data.frame(x01, dy.dx1, lwr1, upr1)
FatherRank_effect$variable <- "FatherRank"

plot_FatherRank <- ggplot(data=FatherRank_effect, aes(x=x01, y=dy.dx1, ymax=upr1, ymin=lwr1)) + 
  geom_line() + geom_ribbon(alpha=0.2) +
  theme_bw() + 
  ylab("Coefficient") + xlab("Year of Birth") +
  ggtitle("Effect of Father's Rank") +
  geom_hline(yintercept=0, linetype="longdash", colour="red") +
  scale_x_continuous(breaks = c(550, 600, 650, 700, 750, 800, 850)) +
  scale_y_continuous(limits = c(-0.025, 0.025),
                     breaks = c(-0.02, 0, 0.02)) +
  theme(plot.title = element_text(hjust = 0.5, face = "bold", size=20),
        axis.title.x = element_text(face = "bold", size=20),
        axis.title.y = element_text(face = "bold", size=20),
        plot.margin=unit(c(0.5, 0.5, 0.5, 0.5),"cm"),
        axis.text.x=element_text(face = "bold", size=18),
        axis.text.y=element_text(face = "bold", size=18)) 
plot_FatherRank


### getting grandpa effects in the same model
dy.dx1 <- results_plot_model$coefficients["GPRank"] +
  results_plot_model$coefficients["decade:GPRank"] * x01 +
  results_plot_model$coefficients["I(decade^2):GPRank"] * (x01^2)

dy.dx1_se <- sqrt(results_plot_model$cov.scaled["GPRank", "GPRank"] + # V(\beta_{1})
                    results_plot_model$cov.scaled["decade:GPRank", "decade:GPRank"] * x01^2 + #x^2*V(\beta_{3})
                    results_plot_model$cov.scaled["I(decade^2):GPRank", "I(decade^2):GPRank"] * x01^4 + #x^4*V(\beta_{4})
                    results_plot_model$cov.scaled["decade:GPRank", "GPRank"] * 2*x01 +
                    results_plot_model$cov.scaled["GPRank", "I(decade^2):GPRank"] * 2*x01^2 +
                    results_plot_model$cov.scaled["decade:GPRank", "I(decade^2):GPRank"] * 2*x01^3)

upr1 <- dy.dx1 + qnorm(.975)*dy.dx1_se # upper bound of the 95 percent
lwr1 <- dy.dx1 - qnorm(.975)*dy.dx1_se # lower bound of the 95 percent

GrandpaRank_effect <- cbind.data.frame(x01, dy.dx1, lwr1, upr1)
GrandpaRank_effect$variable <- "GrandpaRank"

plot_GrandpaRank <- ggplot(data=GrandpaRank_effect, aes(x=x01, y=dy.dx1, ymax=upr1, ymin=lwr1)) + 
  geom_line() + geom_ribbon(alpha=0.2) +
  theme_bw() + 
  ylab("Coefficient") + xlab("Year of Birth") +
  ggtitle("Effect of Grandpa's Rank") +
  geom_hline(yintercept=0, linetype="longdash", colour="red") +
  scale_x_continuous(breaks = c(550, 600, 650, 700, 750, 800, 850)) +
  scale_y_continuous(limits = c(-0.025, 0.025),
                     breaks = c(-0.02, 0, 0.02)) +
  theme(plot.title = element_text(hjust = 0.5, face = "bold", size=20),
        axis.title.x = element_text(face = "bold", size=20),
        axis.title.y = element_text(face = "bold", size=20),
        plot.margin=unit(c(0.5, 0.5, 0.5, 0.5),"cm"),
        axis.text.x=element_text(face = "bold", size=18),
        axis.text.y=element_text(face = "bold", size=18)) 
plot_GrandpaRank


### getting marriage network effects in the same model
dy.dx1 <- results_plot_model$coefficients["MarriageFull"] +
  results_plot_model$coefficients["decade:MarriageFull"] * x01 +
  results_plot_model$coefficients["I(decade^2):MarriageFull"] * (x01^2)

dy.dx1_se <- sqrt(results_plot_model$cov.scaled["MarriageFull", "MarriageFull"] + # V(\beta_{1})
                    results_plot_model$cov.scaled["decade:MarriageFull", "decade:MarriageFull"] * x01^2 + #x^2*V(\beta_{3})
                    results_plot_model$cov.scaled["I(decade^2):MarriageFull", "I(decade^2):MarriageFull"] * x01^4 + #x^4*V(\beta_{4})
                    results_plot_model$cov.scaled["decade:MarriageFull", "MarriageFull"] * 2*x01 +
                    results_plot_model$cov.scaled["MarriageFull", "I(decade^2):MarriageFull"] * 2*x01^2 +
                    results_plot_model$cov.scaled["decade:MarriageFull", "I(decade^2):MarriageFull"] * 2*x01^3)

upr1 <- dy.dx1 + qnorm(.975)*dy.dx1_se # upper bound of the 95 percent
lwr1 <- dy.dx1 - qnorm(.975)*dy.dx1_se # lower bound of the 95 percent

MarriageFull_effect <- cbind.data.frame(x01, dy.dx1, lwr1, upr1)
MarriageFull_effect$variable <- "MarriageFull"

plot_MarriageFull <- ggplot(data=MarriageFull_effect, aes(x=x01, y=dy.dx1, ymax=upr1, ymin=lwr1)) + 
  geom_line() + geom_ribbon(alpha=0.2) +
  theme_bw() + 
  ylab("Coefficient") + xlab("Year of Birth") +
  ggtitle("Effect of Elite Marriage Networks") +
  geom_hline(yintercept=0, linetype="longdash", colour="red") +
  scale_x_continuous(breaks = c(550, 600, 650, 700, 750, 800, 850)) +
  scale_y_continuous(limits = c(-0.5, 0.52),
                     breaks = c(-0.4, -0.2, 0, 0.2, 0.4)) +
  theme(plot.title = element_text(hjust = 0.5, face = "bold", size=20),
        axis.title.x = element_text(face = "bold", size=20),
        axis.title.y = element_text(face = "bold", size=20),
        plot.margin=unit(c(0.5, 0.5, 0.5, 0.5),"cm"),
        axis.text.x=element_text(face = "bold", size=18),
        axis.text.y=element_text(face = "bold", size=18)) 
plot_MarriageFull


grid.arrange(plot_branch, plot_MarriageFull, plot_FatherRank, plot_GrandpaRank, nrow=2)



#### SI Section 3: Effect of Elite Marriage Networks on Office Rank

plot_model <- feols(Rank_New ~ branch + 
                      branch:decade +
                      branch:I(decade^2) + 
                      Exam + 
                      Exam:decade +
                      Exam:I(decade^2) +
                      FRank +
                      FRank:decade + 
                      FRank:I(decade^2) +
                      GPRank +
                      GPRank:decade + 
                      GPRank:I(decade^2) +
                      MarriageFull + 
                      MarriageFull:decade +
                      MarriageFull:I(decade^2) +
                      Bieji | Modern.province^decade +
                      Pre.TangRegion_alt^decade,
                    data = People, cluster = c("decade"))    
results_plot_model <- summary(plot_model)

## get the x01 for the x-axis values in plots
x01 <- min(unique(People$decade), na.rm = T):
  max(unique(People$decade), na.rm = T)

### getting marriage network effects in the same model
dy.dx1 <- results_plot_model$coefficients["MarriageFull"] +
  results_plot_model$coefficients["decade:MarriageFull"] * x01 +
  results_plot_model$coefficients["I(decade^2):MarriageFull"] * (x01^2)

dy.dx1_se <- sqrt(results_plot_model$cov.scaled["MarriageFull", "MarriageFull"] + # V(\beta_{1})
                    results_plot_model$cov.scaled["decade:MarriageFull", "decade:MarriageFull"] * x01^2 + #x^2*V(\beta_{3})
                    results_plot_model$cov.scaled["I(decade^2):MarriageFull", "I(decade^2):MarriageFull"] * x01^4 + #x^4*V(\beta_{4})
                    results_plot_model$cov.scaled["decade:MarriageFull", "MarriageFull"] * 2*x01 +
                    results_plot_model$cov.scaled["MarriageFull", "I(decade^2):MarriageFull"] * 2*x01^2 +
                    results_plot_model$cov.scaled["decade:MarriageFull", "I(decade^2):MarriageFull"] * 2*x01^3)

upr1 <- dy.dx1 + qnorm(.975)*dy.dx1_se # upper bound of the 95 percent
lwr1 <- dy.dx1 - qnorm(.975)*dy.dx1_se # lower bound of the 95 percent

MarriageFull_effect <- cbind.data.frame(x01, dy.dx1, lwr1, upr1)
MarriageFull_effect$variable <- "MarriageFull"

plot_MarriageFull <- ggplot(data=MarriageFull_effect, aes(x=x01, y=dy.dx1, ymax=upr1, ymin=lwr1)) + 
  geom_line() + geom_ribbon(alpha=0.2) +
  theme_bw() + 
  ylab("Coefficient") + xlab("Year of Birth") +
  ggtitle("Effect of Elite Marriage Networks") +
  geom_hline(yintercept=0, linetype="longdash", colour="red") +
  scale_x_continuous(breaks = c(550, 600, 650, 700, 750, 800, 850)) +
  #  scale_y_continuous(limits = c(-0.5, 0.5),
  #                     breaks = c(-0.4, -0.2, 0, 0.2, 0.4)) +
  theme(plot.title = element_text(hjust = 0.5, face = "bold", size=20),
        axis.title.x = element_text(face = "bold", size=20),
        axis.title.y = element_text(face = "bold", size=20),
        plot.margin=unit(c(0.5, 0.5, 0.5, 0.5),"cm"),
        axis.text.x=element_text(face = "bold", size=18),
        axis.text.y=element_text(face = "bold", size=18)) 
plot_MarriageFull



##### SI Section 4: Effects of Key Independent Variables on Office Rank Over Time 
#####               (Removing Men Who Died Before Age 20)

People_20_Older <- People[which(People$Died_after20 == 1), ]

plot_model <- feols(Rank_New ~ branch + 
                      branch:decade +
                      branch:I(decade^2) + 
                      Exam + 
                      Exam:decade +
                      Exam:I(decade^2) +
                      FRank +
                      FRank:decade + 
                      FRank:I(decade^2) +
                      GPRank +
                      GPRank:decade + 
                      GPRank:I(decade^2) +
                      MarriageFull + 
                      MarriageFull:decade +
                      MarriageFull:I(decade^2) +
                      Bieji | Modern.province^decade +
                      Pre.TangRegion_alt^decade,
                    data = People_20_Older, cluster = c("decade"))    
results_plot_model <- summary(plot_model)

## get the x01 for the x-axis values in plots
x01 <- min(unique(People_20_Older$decade), na.rm = T):
  max(unique(People_20_Older$decade), na.rm = T)

## Branch Effect Over Time
dy.dx1 <- results_plot_model$coefficients["branch"] +
  results_plot_model$coefficients["branch:decade"] * x01 +
  results_plot_model$coefficients["branch:I(decade^2)"] * (x01^2)

dy.dx1_se <- sqrt(results_plot_model$cov.scaled["branch", "branch"] + # V(\beta_{1})
                    results_plot_model$cov.scaled["branch:decade", "branch:decade"] * x01^2 + #x^2*V(\beta_{3})
                    results_plot_model$cov.scaled["branch:I(decade^2)", "branch:I(decade^2)"] * x01^4 + #x^4*V(\beta_{4})
                    results_plot_model$cov.scaled["branch:decade", "branch"] * 2*x01 +
                    results_plot_model$cov.scaled["branch", "branch:I(decade^2)"] * 2*x01^2 +
                    results_plot_model$cov.scaled["branch:decade", "branch:I(decade^2)"] * 2*x01^3)

upr1 <- dy.dx1 + qnorm(.975)*dy.dx1_se # upper bound of the 95 percent
lwr1 <- dy.dx1 - qnorm(.975)*dy.dx1_se # lower bound of the 95 percent

branch_effect <- cbind.data.frame(x01, dy.dx1, lwr1, upr1)
branch_effect$variable <- "branch"

plot_branch <- ggplot(data=branch_effect, aes(x=x01, y=dy.dx1, ymax=upr1, ymin=lwr1)) + 
  geom_line() + geom_ribbon(alpha=0.2) +
  theme_bw() + 
  ylab("Coefficient") + xlab("Year of Birth") +
  ggtitle("Effect of Prominent Branch") +
  geom_hline(yintercept=0, linetype="longdash", colour="red") +
  scale_x_continuous(breaks = c(550, 600, 650, 700, 750, 800, 850)) +
  scale_y_continuous(limits = c(-3.5, 8),
                     breaks = c(-2, 0, 2, 4, 6, 8)) +
  theme(plot.title = element_text(hjust = 0.5, face = "bold", size=20),
        axis.title.x = element_text(face = "bold", size=20),
        axis.title.y = element_text(face = "bold", size=20),
        plot.margin=unit(c(0.5, 0.5, 0.5, 0.5),"cm"),
        axis.text.x=element_text(face = "bold", size=18),
        axis.text.y=element_text(face = "bold", size=18)) 
plot_branch


## Exam Effect Over Time
dy.dx1 <- results_plot_model$coefficients["Exam"] +
  results_plot_model$coefficients["decade:Exam"] * x01 +
  results_plot_model$coefficients["I(decade^2):Exam"] * (x01^2)

dy.dx1_se <- sqrt(results_plot_model$cov.scaled["Exam", "Exam"] + # V(\beta_{1})
                    results_plot_model$cov.scaled["decade:Exam", "decade:Exam"] * x01^2 + #x^2*V(\beta_{3})
                    results_plot_model$cov.scaled["I(decade^2):Exam", "I(decade^2):Exam"] * x01^4 + #x^4*V(\beta_{4})
                    results_plot_model$cov.scaled["decade:Exam", "Exam"] * 2*x01 +
                    results_plot_model$cov.scaled["Exam", "I(decade^2):Exam"] * 2*x01^2 +
                    results_plot_model$cov.scaled["decade:Exam", "I(decade^2):Exam"] * 2*x01^3)

upr1 <- dy.dx1 + qnorm(.975)*dy.dx1_se # upper bound of the 95 percent
lwr1 <- dy.dx1 - qnorm(.975)*dy.dx1_se # lower bound of the 95 percent

exam_effect <- cbind.data.frame(x01, dy.dx1, lwr1, upr1)
exam_effect$variable <- "exam"

plot_exam <- ggplot(data=exam_effect, aes(x=x01, y=dy.dx1, ymax=upr1, ymin=lwr1)) + 
  geom_line() + geom_ribbon(alpha=0.2) +
  theme_bw() + 
  ylab("Coefficient") + xlab("Year of Birth") +
  ggtitle("Effect of Keju") +
  geom_hline(yintercept=0, linetype="longdash", colour="red") +
  scale_x_continuous(breaks = c(550, 600, 650, 700, 750, 800, 850)) +
  scale_y_continuous(limits = c(-3.5, 8),
                     breaks = c(-2, 0, 2, 4, 6, 8)) +
  theme(plot.title = element_text(hjust = 0.5, face = "bold", size=20),
        axis.title.x = element_text(face = "bold", size=20),
        axis.title.y = element_text(face = "bold", size=20),
        plot.margin=unit(c(0.5, 0.5, 0.5, 0.5),"cm"),
        axis.text.x=element_text(face = "bold", size=18),
        axis.text.y=element_text(face = "bold", size=18)) 
plot_exam

### getting father effects in the same model
dy.dx1 <- results_plot_model$coefficients["FRank"] +
  results_plot_model$coefficients["decade:FRank"] * x01 +
  results_plot_model$coefficients["I(decade^2):FRank"] * (x01^2)

dy.dx1_se <- sqrt(results_plot_model$cov.scaled["FRank", "FRank"] + # V(\beta_{1})
                    results_plot_model$cov.scaled["decade:FRank", "decade:FRank"] * x01^2 + #x^2*V(\beta_{3})
                    results_plot_model$cov.scaled["I(decade^2):FRank", "I(decade^2):FRank"] * x01^4 + #x^4*V(\beta_{4})
                    results_plot_model$cov.scaled["decade:FRank", "FRank"] * 2*x01 +
                    results_plot_model$cov.scaled["FRank", "I(decade^2):FRank"] * 2*x01^2 +
                    results_plot_model$cov.scaled["decade:FRank", "I(decade^2):FRank"] * 2*x01^3)

upr1 <- dy.dx1 + qnorm(.975)*dy.dx1_se # upper bound of the 95 percent
lwr1 <- dy.dx1 - qnorm(.975)*dy.dx1_se # lower bound of the 95 percent

FatherRank_effect <- cbind.data.frame(x01, dy.dx1, lwr1, upr1)
FatherRank_effect$variable <- "FatherRank"

plot_FatherRank <- ggplot(data=FatherRank_effect, aes(x=x01, y=dy.dx1, ymax=upr1, ymin=lwr1)) + 
  geom_line() + geom_ribbon(alpha=0.2) +
  theme_bw() + 
  ylab("Coefficient") + xlab("Year of Birth") +
  ggtitle("Effect of Father's Rank") +
  geom_hline(yintercept=0, linetype="longdash", colour="red") +
  scale_x_continuous(breaks = c(550, 600, 650, 700, 750, 800, 850)) +
  scale_y_continuous(limits = c(-0.2, 0.6),
                     breaks = c(0, 0.2, 0.4)) +
  theme(plot.title = element_text(hjust = 0.5, face = "bold", size=20),
        axis.title.x = element_text(face = "bold", size=20),
        axis.title.y = element_text(face = "bold", size=20),
        plot.margin=unit(c(0.5, 0.5, 0.5, 0.5),"cm"),
        axis.text.x=element_text(face = "bold", size=18),
        axis.text.y=element_text(face = "bold", size=18)) 
plot_FatherRank


grid.arrange(plot_branch, plot_exam, plot_FatherRank, nrow=1)



#### SI Section 5: Effects of Key Independent Variables on Office Rank Over Time 
####               (Removing Time Slices Oversampled)

## slice_1 "men died between 650 and 660 CE"
## slice_2 "men died between 720 and 730 CE"
## slice_3 "men died between 800 and 810 CE"

No_Slices <- subset(People, People$slice_1 != 1 & People$slice_2 != 1 & People$slice_3 != 1)

######
plot_model <- feols(Rank_New ~ branch + 
                      branch:decade +
                      branch:I(decade^2) + 
                      Exam + 
                      Exam:decade +
                      Exam:I(decade^2) +
                      FRank +
                      FRank:decade + 
                      FRank:I(decade^2) +
                      GPRank +
                      GPRank:decade + 
                      GPRank:I(decade^2) +
                      MarriageFull + 
                      MarriageFull:decade +
                      MarriageFull:I(decade^2) +
                      Bieji | Modern.province^decade +
                      Pre.TangRegion_alt^decade,
                    data = No_Slices, cluster = c("decade"))    
results_plot_model <- summary(plot_model)

## get the x01 for the x-axis values in plots
x01 <- min(unique(No_Slices$decade), na.rm = T):
  max(unique(No_Slices$decade), na.rm = T)

## Branch Effect Over Time
dy.dx1 <- results_plot_model$coefficients["branch"] +
  results_plot_model$coefficients["branch:decade"] * x01 +
  results_plot_model$coefficients["branch:I(decade^2)"] * (x01^2)

dy.dx1_se <- sqrt(results_plot_model$cov.scaled["branch", "branch"] + # V(\beta_{1})
                    results_plot_model$cov.scaled["branch:decade", "branch:decade"] * x01^2 + #x^2*V(\beta_{3})
                    results_plot_model$cov.scaled["branch:I(decade^2)", "branch:I(decade^2)"] * x01^4 + #x^4*V(\beta_{4})
                    results_plot_model$cov.scaled["branch:decade", "branch"] * 2*x01 +
                    results_plot_model$cov.scaled["branch", "branch:I(decade^2)"] * 2*x01^2 +
                    results_plot_model$cov.scaled["branch:decade", "branch:I(decade^2)"] * 2*x01^3)

upr1 <- dy.dx1 + qnorm(.975)*dy.dx1_se # upper bound of the 95 percent
lwr1 <- dy.dx1 - qnorm(.975)*dy.dx1_se # lower bound of the 95 percent

branch_effect <- cbind.data.frame(x01, dy.dx1, lwr1, upr1)
branch_effect$variable <- "branch"

plot_branch <- ggplot(data=branch_effect, aes(x=x01, y=dy.dx1, ymax=upr1, ymin=lwr1)) + 
  geom_line() + geom_ribbon(alpha=0.2) +
  theme_bw() + 
  ylab("Coefficient") + xlab("Year of Birth") +
  ggtitle("Effect of Prominent Branch") +
  geom_hline(yintercept=0, linetype="longdash", colour="red") +
  scale_x_continuous(breaks = c(550, 600, 650, 700, 750, 800, 850)) +
  scale_y_continuous(limits = c(-3.2, 8),
                     breaks = c(-2, 0, 2, 4, 6, 8)) +
  theme(plot.title = element_text(hjust = 0.5, face = "bold", size=20),
        axis.title.x = element_text(face = "bold", size=20),
        axis.title.y = element_text(face = "bold", size=20),
        plot.margin=unit(c(0.5, 0.5, 0.5, 0.5),"cm"),
        axis.text.x=element_text(face = "bold", size=18),
        axis.text.y=element_text(face = "bold", size=18)) 
plot_branch


## Exam Effect Over Time
dy.dx1 <- results_plot_model$coefficients["Exam"] +
  results_plot_model$coefficients["decade:Exam"] * x01 +
  results_plot_model$coefficients["I(decade^2):Exam"] * (x01^2)

dy.dx1_se <- sqrt(results_plot_model$cov.scaled["Exam", "Exam"] + # V(\beta_{1})
                    results_plot_model$cov.scaled["decade:Exam", "decade:Exam"] * x01^2 + #x^2*V(\beta_{3})
                    results_plot_model$cov.scaled["I(decade^2):Exam", "I(decade^2):Exam"] * x01^4 + #x^4*V(\beta_{4})
                    results_plot_model$cov.scaled["decade:Exam", "Exam"] * 2*x01 +
                    results_plot_model$cov.scaled["Exam", "I(decade^2):Exam"] * 2*x01^2 +
                    results_plot_model$cov.scaled["decade:Exam", "I(decade^2):Exam"] * 2*x01^3)

upr1 <- dy.dx1 + qnorm(.975)*dy.dx1_se # upper bound of the 95 percent
lwr1 <- dy.dx1 - qnorm(.975)*dy.dx1_se # lower bound of the 95 percent

exam_effect <- cbind.data.frame(x01, dy.dx1, lwr1, upr1)
exam_effect$variable <- "exam"

plot_exam <- ggplot(data=exam_effect, aes(x=x01, y=dy.dx1, ymax=upr1, ymin=lwr1)) + 
  geom_line() + geom_ribbon(alpha=0.2) +
  theme_bw() + 
  ylab("Coefficient") + xlab("Year of Birth") +
  ggtitle("Effect of Keju") +
  geom_hline(yintercept=0, linetype="longdash", colour="red") +
  scale_x_continuous(breaks = c(550, 600, 650, 700, 750, 800, 850)) +
  scale_y_continuous(limits = c(-6, 8),
                     breaks = c(-2, 0, 2, 4, 6, 8)) +
  theme(plot.title = element_text(hjust = 0.5, face = "bold", size=20),
        axis.title.x = element_text(face = "bold", size=20),
        axis.title.y = element_text(face = "bold", size=20),
        plot.margin=unit(c(0.5, 0.5, 0.5, 0.5),"cm"),
        axis.text.x=element_text(face = "bold", size=18),
        axis.text.y=element_text(face = "bold", size=18)) 
plot_exam

### getting father effects in the same model
dy.dx1 <- results_plot_model$coefficients["FRank"] +
  results_plot_model$coefficients["decade:FRank"] * x01 +
  results_plot_model$coefficients["I(decade^2):FRank"] * (x01^2)

dy.dx1_se <- sqrt(results_plot_model$cov.scaled["FRank", "FRank"] + # V(\beta_{1})
                    results_plot_model$cov.scaled["decade:FRank", "decade:FRank"] * x01^2 + #x^2*V(\beta_{3})
                    results_plot_model$cov.scaled["I(decade^2):FRank", "I(decade^2):FRank"] * x01^4 + #x^4*V(\beta_{4})
                    results_plot_model$cov.scaled["decade:FRank", "FRank"] * 2*x01 +
                    results_plot_model$cov.scaled["FRank", "I(decade^2):FRank"] * 2*x01^2 +
                    results_plot_model$cov.scaled["decade:FRank", "I(decade^2):FRank"] * 2*x01^3)

upr1 <- dy.dx1 + qnorm(.975)*dy.dx1_se # upper bound of the 95 percent
lwr1 <- dy.dx1 - qnorm(.975)*dy.dx1_se # lower bound of the 95 percent

FatherRank_effect <- cbind.data.frame(x01, dy.dx1, lwr1, upr1)
FatherRank_effect$variable <- "FatherRank"

plot_FatherRank <- ggplot(data=FatherRank_effect, aes(x=x01, y=dy.dx1, ymax=upr1, ymin=lwr1)) + 
  geom_line() + geom_ribbon(alpha=0.2) +
  theme_bw() + 
  ylab("Coefficient") + xlab("Year of Birth") +
  ggtitle("Effect of Father's Rank") +
  geom_hline(yintercept=0, linetype="longdash", colour="red") +
  scale_x_continuous(breaks = c(550, 600, 650, 700, 750, 800, 850)) +
  scale_y_continuous(limits = c(-0.2, 0.6),
                     breaks = c(0, 0.2, 0.4)) +
  theme(plot.title = element_text(hjust = 0.5, face = "bold", size=20),
        axis.title.x = element_text(face = "bold", size=20),
        axis.title.y = element_text(face = "bold", size=20),
        plot.margin=unit(c(0.5, 0.5, 0.5, 0.5),"cm"),
        axis.text.x=element_text(face = "bold", size=18),
        axis.text.y=element_text(face = "bold", size=18)) 
plot_FatherRank


grid.arrange(plot_branch, plot_exam, plot_FatherRank, nrow=1)



#### SI Section 6: Effects of Key Independent Variables on Office Rank Over Time 
####               (Removing Men Who Came From Families That Produced Chief Ministers)

## CMin_Clan : Men Who Came From Families That Produced Chief Ministers

Pre_Empress <- subset(People, After_Empress==0)
Post_Empress <- subset(People, After_Empress==1)


full <- People[, c("Rank_New", "branch", "Exam", "FRank", "GPRank", "MarriageFull", 
                   "Bieji", "Modern.province", "Pre.TangRegion_alt", "decade", "CMin_Clan")]
Pre <- Pre_Empress[, c("Rank_New", "branch", "Exam", "FRank", "GPRank", "MarriageFull", 
                       "Bieji", "Modern.province", "Pre.TangRegion_alt", "decade", "CMin_Clan")]
Post <- Post_Empress[, c("Rank_New", "branch", "Exam", "FRank", "GPRank", "MarriageFull", 
                         "Bieji", "Modern.province", "Pre.TangRegion_alt", "decade", "CMin_Clan")]



m1 <- feols(Rank_New ~ branch + Exam + FRank + GPRank + MarriageFull +
              Bieji  | Modern.province^decade +
              Pre.TangRegion_alt^decade, 
            data = subset(Pre, Pre$CMin_Clan==0))
m1_clse <- summary(m1, cluster = c("decade"))
summary(m1_clse)

m2 <- feols(Rank_New ~ branch + Exam + FRank + GPRank + MarriageFull +
              Bieji  | Modern.province^decade +
              Pre.TangRegion_alt^decade, 
            data = subset(Post, Post$CMin_Clan==0))
m2_clse <- summary(m2, cluster = c("decade"))
summary(m2_clse)

m3 <- feols(Rank_New ~ branch + Exam + FRank + GPRank + MarriageFull +
              Bieji  | Modern.province^decade +
              Pre.TangRegion_alt^decade, 
            data = subset(People, People$CMin_Clan==0))
m3_clse <- summary(m3, cluster = c("decade"))
summary(m3_clse)

m4 <- feols(Rank_New ~ branch*Exam + FRank*Exam + GPRank + MarriageFull +
              Bieji  | Modern.province^decade +
              Pre.TangRegion_alt^decade, 
            data = subset(People, People$CMin_Clan==0))
m4_clse <- summary(m4, cluster = c("decade"))
summary(m4_clse)

etable(m1_clse, m2_clse, m3_clse, m4_clse, digits = 3,
       keep=c("branch", "Exam", "FRank"),
       signif.code=c("***"=0.001, "**"=0.01, "*"=0.05, "+"=0.1))



No_CMin <- subset(People, People$CMin_Clan==0)

######
plot_model <- feols(Rank_New ~ branch + 
                      branch:decade +
                      branch:I(decade^2) + 
                      Exam + 
                      Exam:decade +
                      Exam:I(decade^2) +
                      FRank +
                      FRank:decade + 
                      FRank:I(decade^2) +
                      GPRank +
                      GPRank:decade + 
                      GPRank:I(decade^2) +
                      MarriageFull + 
                      MarriageFull:decade +
                      MarriageFull:I(decade^2) +
                      Bieji | Modern.province^decade +
                      Pre.TangRegion_alt^decade,
                    data = No_CMin, cluster = c("decade"))    
results_plot_model <- summary(plot_model)

## get the x01 for the x-axis values in plots
x01 <- min(unique(No_CMin$decade), na.rm = T):
  max(unique(No_CMin$decade), na.rm = T)

## Branch Effect Over Time
dy.dx1 <- results_plot_model$coefficients["branch"] +
  results_plot_model$coefficients["branch:decade"] * x01 +
  results_plot_model$coefficients["branch:I(decade^2)"] * (x01^2)

dy.dx1_se <- sqrt(results_plot_model$cov.scaled["branch", "branch"] + # V(\beta_{1})
                    results_plot_model$cov.scaled["branch:decade", "branch:decade"] * x01^2 + #x^2*V(\beta_{3})
                    results_plot_model$cov.scaled["branch:I(decade^2)", "branch:I(decade^2)"] * x01^4 + #x^4*V(\beta_{4})
                    results_plot_model$cov.scaled["branch:decade", "branch"] * 2*x01 +
                    results_plot_model$cov.scaled["branch", "branch:I(decade^2)"] * 2*x01^2 +
                    results_plot_model$cov.scaled["branch:decade", "branch:I(decade^2)"] * 2*x01^3)

upr1 <- dy.dx1 + qnorm(.975)*dy.dx1_se # upper bound of the 95 percent
lwr1 <- dy.dx1 - qnorm(.975)*dy.dx1_se # lower bound of the 95 percent

branch_effect <- cbind.data.frame(x01, dy.dx1, lwr1, upr1)
branch_effect$variable <- "branch"

plot_branch <- ggplot(data=branch_effect, aes(x=x01, y=dy.dx1, ymax=upr1, ymin=lwr1)) + 
  geom_line() + geom_ribbon(alpha=0.2) +
  theme_bw() + 
  ylab("Coefficient") + xlab("Year of Birth") +
  ggtitle("Effect of Prominent Branch") +
  geom_hline(yintercept=0, linetype="longdash", colour="red") +
  scale_x_continuous(breaks = c(550, 600, 650, 700, 750, 800, 850)) +
  scale_y_continuous(limits = c(-3.2, 8.4),
                     breaks = c(-2, 0, 2, 4, 6, 8)) +
  theme(plot.title = element_text(hjust = 0.5, face = "bold", size=20),
        axis.title.x = element_text(face = "bold", size=20),
        axis.title.y = element_text(face = "bold", size=20),
        plot.margin=unit(c(0.5, 0.5, 0.5, 0.5),"cm"),
        axis.text.x=element_text(face = "bold", size=18),
        axis.text.y=element_text(face = "bold", size=18)) 
plot_branch


## Exam Effect Over Time
dy.dx1 <- results_plot_model$coefficients["Exam"] +
  results_plot_model$coefficients["decade:Exam"] * x01 +
  results_plot_model$coefficients["I(decade^2):Exam"] * (x01^2)

dy.dx1_se <- sqrt(results_plot_model$cov.scaled["Exam", "Exam"] + # V(\beta_{1})
                    results_plot_model$cov.scaled["decade:Exam", "decade:Exam"] * x01^2 + #x^2*V(\beta_{3})
                    results_plot_model$cov.scaled["I(decade^2):Exam", "I(decade^2):Exam"] * x01^4 + #x^4*V(\beta_{4})
                    results_plot_model$cov.scaled["decade:Exam", "Exam"] * 2*x01 +
                    results_plot_model$cov.scaled["Exam", "I(decade^2):Exam"] * 2*x01^2 +
                    results_plot_model$cov.scaled["decade:Exam", "I(decade^2):Exam"] * 2*x01^3)

upr1 <- dy.dx1 + qnorm(.975)*dy.dx1_se # upper bound of the 95 percent
lwr1 <- dy.dx1 - qnorm(.975)*dy.dx1_se # lower bound of the 95 percent

exam_effect <- cbind.data.frame(x01, dy.dx1, lwr1, upr1)
exam_effect$variable <- "exam"

plot_exam <- ggplot(data=exam_effect, aes(x=x01, y=dy.dx1, ymax=upr1, ymin=lwr1)) + 
  geom_line() + geom_ribbon(alpha=0.2) +
  theme_bw() + 
  ylab("Coefficient") + xlab("Year of Birth") +
  ggtitle("Effect of Keju") +
  geom_hline(yintercept=0, linetype="longdash", colour="red") +
  scale_x_continuous(breaks = c(550, 600, 650, 700, 750, 800, 850)) +
  scale_y_continuous(limits = c(-6, 8),
                     breaks = c(-2, 0, 2, 4, 6, 8)) +
  theme(plot.title = element_text(hjust = 0.5, face = "bold", size=20),
        axis.title.x = element_text(face = "bold", size=20),
        axis.title.y = element_text(face = "bold", size=20),
        plot.margin=unit(c(0.5, 0.5, 0.5, 0.5),"cm"),
        axis.text.x=element_text(face = "bold", size=18),
        axis.text.y=element_text(face = "bold", size=18)) 
plot_exam

### getting father effects in the same model
dy.dx1 <- results_plot_model$coefficients["FRank"] +
  results_plot_model$coefficients["decade:FRank"] * x01 +
  results_plot_model$coefficients["I(decade^2):FRank"] * (x01^2)

dy.dx1_se <- sqrt(results_plot_model$cov.scaled["FRank", "FRank"] + # V(\beta_{1})
                    results_plot_model$cov.scaled["decade:FRank", "decade:FRank"] * x01^2 + #x^2*V(\beta_{3})
                    results_plot_model$cov.scaled["I(decade^2):FRank", "I(decade^2):FRank"] * x01^4 + #x^4*V(\beta_{4})
                    results_plot_model$cov.scaled["decade:FRank", "FRank"] * 2*x01 +
                    results_plot_model$cov.scaled["FRank", "I(decade^2):FRank"] * 2*x01^2 +
                    results_plot_model$cov.scaled["decade:FRank", "I(decade^2):FRank"] * 2*x01^3)

upr1 <- dy.dx1 + qnorm(.975)*dy.dx1_se # upper bound of the 95 percent
lwr1 <- dy.dx1 - qnorm(.975)*dy.dx1_se # lower bound of the 95 percent

FatherRank_effect <- cbind.data.frame(x01, dy.dx1, lwr1, upr1)
FatherRank_effect$variable <- "FatherRank"

plot_FatherRank <- ggplot(data=FatherRank_effect, aes(x=x01, y=dy.dx1, ymax=upr1, ymin=lwr1)) + 
  geom_line() + geom_ribbon(alpha=0.2) +
  theme_bw() + 
  ylab("Coefficient") + xlab("Year of Birth") +
  ggtitle("Effect of Father's Rank") +
  geom_hline(yintercept=0, linetype="longdash", colour="red") +
  scale_x_continuous(breaks = c(550, 600, 650, 700, 750, 800, 850)) +
  scale_y_continuous(limits = c(-0.2, 0.6),
                     breaks = c(0, 0.2, 0.4)) +
  theme(plot.title = element_text(hjust = 0.5, face = "bold", size=20),
        axis.title.x = element_text(face = "bold", size=20),
        axis.title.y = element_text(face = "bold", size=20),
        plot.margin=unit(c(0.5, 0.5, 0.5, 0.5),"cm"),
        axis.text.x=element_text(face = "bold", size=18),
        axis.text.y=element_text(face = "bold", size=18)) 
plot_FatherRank


grid.arrange(plot_branch, plot_exam, plot_FatherRank, nrow=1)



### SI Section 7: Total Effects of Family Background

plot_model <- feols(Rank_New ~ branch + 
                      branch:decade +
                      branch:I(decade^2) + 
                      Bieji | Modern.province^decade +
                      Pre.TangRegion_alt^decade,
                    data = People, cluster = c("decade"))    
results_plot_model <- summary(plot_model)

## get the x01 for the x-axis values in plots
x01 <- min(unique(People$decade), na.rm = T):
  max(unique(People$decade), na.rm = T)

## Branch Effect Over Time
dy.dx1 <- results_plot_model$coefficients["branch"] +
  results_plot_model$coefficients["branch:decade"] * x01 +
  results_plot_model$coefficients["branch:I(decade^2)"] * (x01^2)

dy.dx1_se <- sqrt(results_plot_model$cov.scaled["branch", "branch"] + # V(\beta_{1})
                    results_plot_model$cov.scaled["branch:decade", "branch:decade"] * x01^2 + #x^2*V(\beta_{3})
                    results_plot_model$cov.scaled["branch:I(decade^2)", "branch:I(decade^2)"] * x01^4 + #x^4*V(\beta_{4})
                    results_plot_model$cov.scaled["branch:decade", "branch"] * 2*x01 +
                    results_plot_model$cov.scaled["branch", "branch:I(decade^2)"] * 2*x01^2 +
                    results_plot_model$cov.scaled["branch:decade", "branch:I(decade^2)"] * 2*x01^3)

upr1 <- dy.dx1 + qnorm(.975)*dy.dx1_se # upper bound of the 95 percent
lwr1 <- dy.dx1 - qnorm(.975)*dy.dx1_se # lower bound of the 95 percent

branch_effect <- cbind.data.frame(x01, dy.dx1, lwr1, upr1)
branch_effect$variable <- "branch"

plot_branch <- ggplot(data=branch_effect, aes(x=x01, y=dy.dx1, ymax=upr1, ymin=lwr1)) + 
  geom_line() + geom_ribbon(alpha=0.2) +
  theme_bw() + 
  ylab("Coefficient") + xlab("Year of Birth") +
  ggtitle("Effect of Prominent Branch") +
  geom_hline(yintercept=0, linetype="longdash", colour="red") +
  scale_x_continuous(breaks = c(550, 600, 650, 700, 750, 800, 850)) +
  scale_y_continuous(limits = c(-4.2, 8),
                     breaks = c(-2, 0, 2, 4, 6, 8)) +
  theme(plot.title = element_text(hjust = 0.5, face = "bold", size=20),
        axis.title.x = element_text(face = "bold", size=20),
        axis.title.y = element_text(face = "bold", size=20),
        plot.margin=unit(c(0.5, 0.5, 0.5, 0.5),"cm"),
        axis.text.x=element_text(face = "bold", size=18),
        axis.text.y=element_text(face = "bold", size=18)) 
plot_branch

##
plot_model <- feols(Rank_New ~ MarriageFull + 
                      MarriageFull:decade +
                      MarriageFull:I(decade^2) + 
                      Bieji | Modern.province^decade +
                      Pre.TangRegion_alt^decade,
                    data = People, cluster = c("decade"))    
results_plot_model <- summary(plot_model)

## get the x01 for the x-axis values in plots
x01 <- min(unique(People$decade), na.rm = T):
  max(unique(People$decade), na.rm = T)

## MarriageFull Effect Over Time
dy.dx1 <- results_plot_model$coefficients["MarriageFull"] +
  results_plot_model$coefficients["MarriageFull:decade"] * x01 +
  results_plot_model$coefficients["MarriageFull:I(decade^2)"] * (x01^2)

dy.dx1_se <- sqrt(results_plot_model$cov.scaled["MarriageFull", "MarriageFull"] + # V(\beta_{1})
                    results_plot_model$cov.scaled["MarriageFull:decade", "MarriageFull:decade"] * x01^2 + #x^2*V(\beta_{3})
                    results_plot_model$cov.scaled["MarriageFull:I(decade^2)", "MarriageFull:I(decade^2)"] * x01^4 + #x^4*V(\beta_{4})
                    results_plot_model$cov.scaled["MarriageFull:decade", "MarriageFull"] * 2*x01 +
                    results_plot_model$cov.scaled["MarriageFull", "MarriageFull:I(decade^2)"] * 2*x01^2 +
                    results_plot_model$cov.scaled["MarriageFull:decade", "MarriageFull:I(decade^2)"] * 2*x01^3)

upr1 <- dy.dx1 + qnorm(.975)*dy.dx1_se # upper bound of the 95 percent
lwr1 <- dy.dx1 - qnorm(.975)*dy.dx1_se # lower bound of the 95 percent

MarriageFull_effect <- cbind.data.frame(x01, dy.dx1, lwr1, upr1)
MarriageFull_effect$variable <- "MarriageFull"

plot_MarriageFull <- ggplot(data=MarriageFull_effect, aes(x=x01, y=dy.dx1, ymax=upr1, ymin=lwr1)) + 
  geom_line() + geom_ribbon(alpha=0.2) +
  theme_bw() + 
  ylab("Coefficient") + xlab("Year of Birth") +
  ggtitle("Effect of Elite Marriage Networks") +
  geom_hline(yintercept=0, linetype="longdash", colour="red") +
  scale_x_continuous(breaks = c(550, 600, 650, 700, 750, 800, 850)) +
  scale_y_continuous(limits = c(-4.2, 8),
                     breaks = c(-2, 0, 2, 4, 6, 8)) +
  theme(plot.title = element_text(hjust = 0.5, face = "bold", size=20),
        axis.title.x = element_text(face = "bold", size=20),
        axis.title.y = element_text(face = "bold", size=20),
        plot.margin=unit(c(0.5, 0.5, 0.5, 0.5),"cm"),
        axis.text.x=element_text(face = "bold", size=18),
        axis.text.y=element_text(face = "bold", size=18)) 
plot_MarriageFull


grid.arrange(plot_branch, plot_MarriageFull, nrow=1)



### SI Section 8: Sensitivity analyses to sample selection
## Replications for Section 8 require additional external data. They are available upon request.


### SI Section 9: The validity of prominent branch as a measure of family pedigree
## Assuming 0.14 probability of becoming a bureaucrat for the whole sample

People$Ranked <- ifelse(People$Rank_New==0, 0,
                        ifelse(People$Rank_New>0, 1, NA))
summary(People$Ranked)


People$weights <- ifelse(People$Ranked == 1, 1/(mean(People$Ranked, na.rm = T)/0.14), 
                         1/( (1-mean(People$Ranked, na.rm = T))/(1-0.14) ))
People$weights <- People$weights/mean(People$weights, na.rm = T)
summary(People$weights)


######
plot_model <- feols(Rank_New ~ branch + 
                      branch:decade +
                      branch:I(decade^2) + 
                      Exam + 
                      Exam:decade +
                      Exam:I(decade^2) +
                      FRank +
                      FRank:decade + 
                      FRank:I(decade^2) +
                      GPRank +
                      GPRank:decade + 
                      GPRank:I(decade^2) +
                      MarriageFull + 
                      MarriageFull:decade +
                      MarriageFull:I(decade^2) +
                      Bieji | Modern.province^decade +
                      Pre.TangRegion_alt^decade,
                    data = People, cluster = c("decade"), weights = People$weights)    
results_plot_model <- summary(plot_model)

## get the x01 for the x-axis values in plots
x01 <- min(unique(People$decade), na.rm = T):
  max(unique(People$decade), na.rm = T)

## Branch Effect Over Time
dy.dx1 <- results_plot_model$coefficients["branch"] +
  results_plot_model$coefficients["branch:decade"] * x01 +
  results_plot_model$coefficients["branch:I(decade^2)"] * (x01^2)

dy.dx1_se <- sqrt(results_plot_model$cov.scaled["branch", "branch"] + # V(\beta_{1})
                    results_plot_model$cov.scaled["branch:decade", "branch:decade"] * x01^2 + #x^2*V(\beta_{3})
                    results_plot_model$cov.scaled["branch:I(decade^2)", "branch:I(decade^2)"] * x01^4 + #x^4*V(\beta_{4})
                    results_plot_model$cov.scaled["branch:decade", "branch"] * 2*x01 +
                    results_plot_model$cov.scaled["branch", "branch:I(decade^2)"] * 2*x01^2 +
                    results_plot_model$cov.scaled["branch:decade", "branch:I(decade^2)"] * 2*x01^3)

upr1 <- dy.dx1 + qnorm(.975)*dy.dx1_se # upper bound of the 95 percent
lwr1 <- dy.dx1 - qnorm(.975)*dy.dx1_se # lower bound of the 95 percent

branch_effect <- cbind.data.frame(x01, dy.dx1, lwr1, upr1)
branch_effect$variable <- "branch"

plot_branch <- ggplot(data=branch_effect, aes(x=x01, y=dy.dx1, ymax=upr1, ymin=lwr1)) + 
  geom_line() + geom_ribbon(alpha=0.2) +
  theme_bw() + 
  ylab("Coefficient") + xlab("Year of Birth") +
  ggtitle("Effect of Prominent Branch") +
  geom_hline(yintercept=0, linetype="longdash", colour="red") +
  scale_x_continuous(breaks = c(550, 600, 650, 700, 750, 800, 850)) +
  scale_y_continuous(limits = c(-6.2, 9),
                     breaks = c(-4, -2, 0, 2, 4, 6, 8)) +
  theme(plot.title = element_text(hjust = 0.5, face = "bold", size=20),
        axis.title.x = element_text(face = "bold", size=20),
        axis.title.y = element_text(face = "bold", size=20),
        plot.margin=unit(c(0.5, 0.5, 0.5, 0.5),"cm"),
        axis.text.x=element_text(face = "bold", size=18),
        axis.text.y=element_text(face = "bold", size=18)) 
plot_branch


## Exam Effect Over Time
dy.dx1 <- results_plot_model$coefficients["Exam"] +
  results_plot_model$coefficients["decade:Exam"] * x01 +
  results_plot_model$coefficients["I(decade^2):Exam"] * (x01^2)

dy.dx1_se <- sqrt(results_plot_model$cov.scaled["Exam", "Exam"] + # V(\beta_{1})
                    results_plot_model$cov.scaled["decade:Exam", "decade:Exam"] * x01^2 + #x^2*V(\beta_{3})
                    results_plot_model$cov.scaled["I(decade^2):Exam", "I(decade^2):Exam"] * x01^4 + #x^4*V(\beta_{4})
                    results_plot_model$cov.scaled["decade:Exam", "Exam"] * 2*x01 +
                    results_plot_model$cov.scaled["Exam", "I(decade^2):Exam"] * 2*x01^2 +
                    results_plot_model$cov.scaled["decade:Exam", "I(decade^2):Exam"] * 2*x01^3)

upr1 <- dy.dx1 + qnorm(.975)*dy.dx1_se # upper bound of the 95 percent
lwr1 <- dy.dx1 - qnorm(.975)*dy.dx1_se # lower bound of the 95 percent

exam_effect <- cbind.data.frame(x01, dy.dx1, lwr1, upr1)
exam_effect$variable <- "exam"

plot_exam <- ggplot(data=exam_effect, aes(x=x01, y=dy.dx1, ymax=upr1, ymin=lwr1)) + 
  geom_line() + geom_ribbon(alpha=0.2) +
  theme_bw() + 
  ylab("Coefficient") + xlab("Year of Birth") +
  ggtitle("Effect of Keju") +
  geom_hline(yintercept=0, linetype="longdash", colour="red") +
  scale_x_continuous(breaks = c(550, 600, 650, 700, 750, 800, 850)) +
  scale_y_continuous(limits = c(-9.7, 10.8),
                     breaks = c(-8, -4, 0,  4,  8)) +
  theme(plot.title = element_text(hjust = 0.5, face = "bold", size=20),
        axis.title.x = element_text(face = "bold", size=20),
        axis.title.y = element_text(face = "bold", size=20),
        plot.margin=unit(c(0.5, 0.5, 0.5, 0.5),"cm"),
        axis.text.x=element_text(face = "bold", size=18),
        axis.text.y=element_text(face = "bold", size=18)) 
plot_exam


### getting father effects in the same model
dy.dx1 <- results_plot_model$coefficients["FRank"] +
  results_plot_model$coefficients["decade:FRank"] * x01 +
  results_plot_model$coefficients["I(decade^2):FRank"] * (x01^2)

dy.dx1_se <- sqrt(results_plot_model$cov.scaled["FRank", "FRank"] + # V(\beta_{1})
                    results_plot_model$cov.scaled["decade:FRank", "decade:FRank"] * x01^2 + #x^2*V(\beta_{3})
                    results_plot_model$cov.scaled["I(decade^2):FRank", "I(decade^2):FRank"] * x01^4 + #x^4*V(\beta_{4})
                    results_plot_model$cov.scaled["decade:FRank", "FRank"] * 2*x01 +
                    results_plot_model$cov.scaled["FRank", "I(decade^2):FRank"] * 2*x01^2 +
                    results_plot_model$cov.scaled["decade:FRank", "I(decade^2):FRank"] * 2*x01^3)

upr1 <- dy.dx1 + qnorm(.975)*dy.dx1_se # upper bound of the 95 percent
lwr1 <- dy.dx1 - qnorm(.975)*dy.dx1_se # lower bound of the 95 percent

FatherRank_effect <- cbind.data.frame(x01, dy.dx1, lwr1, upr1)
FatherRank_effect$variable <- "FatherRank"

plot_FatherRank <- ggplot(data=FatherRank_effect, aes(x=x01, y=dy.dx1, ymax=upr1, ymin=lwr1)) + 
  geom_line() + geom_ribbon(alpha=0.2) +
  theme_bw() + 
  ylab("Coefficient") + xlab("Year of Birth") +
  ggtitle("Effect of Father's Rank") +
  geom_hline(yintercept=0, linetype="longdash", colour="red") +
  scale_x_continuous(breaks = c(550, 600, 650, 700, 750, 800, 850)) +
  scale_y_continuous(limits = c(-0.2, 0.6),
                     breaks = c(0, 0.2, 0.4)) +
  theme(plot.title = element_text(hjust = 0.5, face = "bold", size=20),
        axis.title.x = element_text(face = "bold", size=20),
        axis.title.y = element_text(face = "bold", size=20),
        plot.margin=unit(c(0.5, 0.5, 0.5, 0.5),"cm"),
        axis.text.x=element_text(face = "bold", size=18),
        axis.text.y=element_text(face = "bold", size=18)) 
plot_FatherRank


grid.arrange(plot_branch, plot_exam, plot_FatherRank, nrow=1,
             top = textGrob("Weighted by Proportion of Office Holders (p=0.14)", 
                            gp = gpar(fontsize = 20, fontface = "bold")))



### Correlation between Prominent Branch and Elite Marriage Network vs. 
### between Choronym and Elite Marriage Network

## zwt : Whether or not it is the mid- and late-Tang period (after 755 CE)
## sclaim : Whether one's choronym is specified in the epitaph. 

m00 <- feols(MarriageFull ~ 
               branch + 
               branch:zwt + 
               sclaim + 
               sclaim:zwt|
               Modern.province +
               decade, data = People)
summary(m00, cluster = c("decade"))
results_m00 <- summary(m00, cluster = c("decade"))
##
main_branch <- results_m00$coefficients["branch"]
se_branch <- results_m00$se["branch"]
lower_main_branch <- main_branch - qnorm(.975) * se_branch
higher_main_branch <- main_branch + qnorm(.975) * se_branch

dy.dx1_branch <- results_m00$coefficients["branch"] + 
  results_m00$coefficients["branch:zwt"]  
se.dy.dx1_branch <- 
  sqrt(results_m00$cov.scaled["branch", "branch"] + 
         results_m00$cov.scaled["branch:zwt", 
                                "branch:zwt"] + 
         2*results_m00$cov.scaled["branch", "branch:zwt"])
lower_dx_branch <- dy.dx1_branch - qnorm(.975) * se.dy.dx1_branch
higher_dx_branch <- dy.dx1_branch + qnorm(.975) * se.dy.dx1_branch
##
main_sclaim <- results_m00$coefficients["sclaim"]
se_sclaim <- results_m00$se["sclaim"]
lower_main_sclaim <- main_sclaim - qnorm(.975) * se_sclaim
higher_main_sclaim <- main_sclaim + qnorm(.975) * se_sclaim

dy.dx1_sclaim <- results_m00$coefficients["sclaim"] + 
  results_m00$coefficients["zwt:sclaim"]  
se.dy.dx1_sclaim <- 
  sqrt(results_m00$cov.scaled["sclaim", "sclaim"] + 
         results_m00$cov.scaled["zwt:sclaim", 
                                "zwt:sclaim"] + 
         2*results_m00$cov.scaled["sclaim", "zwt:sclaim"])
lower_dx_sclaim <- dy.dx1_sclaim - qnorm(.975) * se.dy.dx1_sclaim
higher_dx_sclaim <- dy.dx1_sclaim + qnorm(.975) * se.dy.dx1_sclaim



par(mar = c(5, 5, 3 , 1))
plot(0, xaxt = 'n', pch = '', ylab = '', xlab = '',
     xlim = c(0.3, .9),
     ylim = c(-.15, 1),
     main = "Correlation between Prominent Branch and Elite Marriage Network vs. 
     between Choronym and Elite Marriage Network")

lines(c(.35, .35), c(lower_main_branch, higher_main_branch))
lines(c(.45, .45), c(lower_main_sclaim, higher_main_sclaim))
lines(c(.75, .75), c(lower_dx_branch, higher_dx_branch))
lines(c(.85, .85), c(lower_dx_sclaim, higher_dx_sclaim))
points(.35, main_branch, pch = 19, cex = 1)
points(.45, main_sclaim, pch = 6, cex = 1)
points(.75, dy.dx1_branch, pch = 19, cex = 1)
points(.85, dy.dx1_sclaim, pch = 6, cex = 1)

abline(h = 0, lty = 2)
text(.35, higher_main_branch + .05, "Prominent Branch as \n a Measure of Pedigree")
text(.45, higher_main_sclaim + .05, "Choronym as \n a Measure of Pedigree")
text(.75, higher_dx_branch + .05, "Prominent Branch as \n a Measure of Pedigree")
text(.85, higher_dx_sclaim + .05, "Choronym as \n a Measure of Pedigree")
axis(side = 1, at = c(.4, .8),
     line = 1, tick = FALSE, 
     labels = c("Before 755 CE", "After 755 CE"))



### Table S1. Descriptive statistics on key variables for all Tang years and before and after the An Lushan Rebellion (755 CE)

## Before An Lushan vs. After An Lushan
Pre_ALS <- subset(People, People$After_ALS==0)
Post_ALS <- subset(People, People$After_ALS==1)


full <- People[, c("branch", "FRank", "Exam",  "Rank_New", "GPRank", "MarriageFull", 
                   "Bieji", "Modern.province", "Pre.TangRegion_alt", "decade")]
full <- na.omit(full)
full <- full[, 1:4]

Pre <- Pre_ALS[, c("branch", "FRank", "Exam",  "Rank_New", "GPRank", "MarriageFull", 
                   "Bieji", "Modern.province", "Pre.TangRegion_alt", "decade")]
Pre <- na.omit(Pre)
Pre <- Pre[, 1:4]

Post <- Post_ALS[, c("branch", "FRank", "Exam",  "Rank_New", "GPRank", "MarriageFull", 
                     "Bieji", "Modern.province", "Pre.TangRegion_alt", "decade")]
Post <- na.omit(Post)
Post <- Post[, 1:4]


descriptive_mean <- as.data.frame(apply(full, 2, function (x) mean(x, na.rm = T)))
descriptive_sd <- as.data.frame(apply(full, 2, function (x) sd(x, na.rm = T)))
descriptive_min <- as.data.frame(apply(full, 2, function (x) min(x, na.rm = T)))
descriptive_max <- as.data.frame(apply(full, 2, function (x) max(x, na.rm = T)))
descriptive_n <- as.data.frame(apply(full, 2, function (x) length(x[!is.na(x)])))
descriptive_full <- cbind.data.frame(round(descriptive_mean, digits=3), 
                                     round(descriptive_sd, digits=3), 
                                     round(descriptive_min, digits=3), 
                                     round(descriptive_max, digits=3),
                                     descriptive_n)

colnames(descriptive_full) <- c("Mean", "SD", "Min", "Max", "N")

##########
descriptive_mean <- as.data.frame(apply(Pre, 2, function (x) mean(x, na.rm = T)))
descriptive_sd <- as.data.frame(apply(Pre, 2, function (x) sd(x, na.rm = T)))
descriptive_min <- as.data.frame(apply(Pre, 2, function (x) min(x, na.rm = T)))
descriptive_max <- as.data.frame(apply(Pre, 2, function (x) max(x, na.rm = T)))
descriptive_n <- as.data.frame(apply(Pre, 2, function (x) length(x[!is.na(x)])))
descriptive_Pre <- cbind.data.frame(round(descriptive_mean, digits=3), 
                                    round(descriptive_sd, digits=3), 
                                    round(descriptive_min, digits=3), 
                                    round(descriptive_max, digits=3),
                                    descriptive_n)

colnames(descriptive_Pre) <- c("Mean", "SD", "Min", "Max", "N")


##########
descriptive_mean <- as.data.frame(apply(Post, 2, function (x) mean(x, na.rm = T)))
descriptive_sd <- as.data.frame(apply(Post, 2, function (x) sd(x, na.rm = T)))
descriptive_min <- as.data.frame(apply(Post, 2, function (x) min(x, na.rm = T)))
descriptive_max <- as.data.frame(apply(Post, 2, function (x) max(x, na.rm = T)))
descriptive_n <- as.data.frame(apply(Post, 2, function (x) length(x[!is.na(x)])))
descriptive_Post <- cbind.data.frame(round(descriptive_mean, digits=3), 
                                     round(descriptive_sd, digits=3), 
                                     round(descriptive_min, digits=3), 
                                     round(descriptive_max, digits=3),
                                     descriptive_n)

colnames(descriptive_Post) <- c("Mean", "SD", "Min", "Max", "N")


descriptive_table <- cbind.data.frame(descriptive_full, descriptive_Pre, descriptive_Post)
descriptive_table


